/*******************************************************************************
	- Simulation of performance changes by the end of century
*******************************************************************************/

********************************************************************************
* initial settings
********************************************************************************

version 14.2
clear 
drop _all
set more off

*----------------------------------

// set your own local path where you put the JAERE_Submission folder
if c(username) == "zhenx" {
	global LOCALPATH "C:/Users/zhenx/Dropbox/Research/Temperature and Athletes/temp_acclimatization/Code" 
}
global PATH "$LOCALPATH/JAERE_Submission"

********************************************************************************
* Table 2: estimated performance change by 2100 from RCP8.5 Climate Warming
********************************************************************************

*** set results folder
	cd "$PATH/Results/Simulations/Baseline"

*----------------------------regression estimates------------------------------*

*** load data set 
	use "$PATH/Data/TF_Data.dta", clear

*** define control variables and fixed effects
	global weather_control wind Meet_preciptotal Meet_ozone travel_dist 
	global absorb_var athlete season_week event_code##(venue home1) year_code i.Meet_dew_bin

*** drop training temperature lower than 40F to mitigate noise
	/*avoid the case where athletes might get trained indoors during extreme cold weathers*/
	drop if Team_tavg <= 40 

*** drop the observations that have missing values on key variables
	drop if Meet_tavg == . | Team_tavg == . | Meet_preciptotal ==. | Meet_ozone == .

*** set clustered standard error
	global cluster_se meet_code athlete

*** keep only endurance events
	keep if event_type == "Endurance"

*** construct linear splines for pre-exposure temperatures
	mkspline team_linspl1 50 team_linspl2 60 team_linspl3 = Team_tavg, di

*** linear spline with 3 knots
	cap drop meet_3linspl*
	mkspline meet_3linspl1 50 meet_3linspl2 60 meet_3linspl3 70 meet_3linspl4  = Meet_tavg, di

	reghdfe norm_result c.meet_3linspl* $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	estimates save "Semi_Parametric_Linear_Spline_3knots_Homo", replace

	reghdfe norm_result c.meet_3linspl* (c.team_linspl1 c.team_linspl2 c.team_linspl3)#c.meet_3linspl* $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	estimates save "Semi_Parametric_Linear_Spline_3knots_Hetero", replace

*** quadratic
	reghdfe norm_result Meet_tavg c.Meet_tavg#c.Meet_tavg $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	estimates save "Semi_Parametric_Quadratic_Homo", replace

	reghdfe norm_result Meet_tavg c.Meet_tavg#c.Meet_tavg (c.team_linspl1 c.team_linspl2 c.team_linspl3)#(c.Meet_tavg c.Meet_tavg#c.Meet_tavg) $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	estimates save "Semi_Parametric_Quadratic_Hetero", replace

*** linear spline with 2 knots 
	mkspline meet_linspl1 50 meet_linspl2 60 meet_linspl3 = Meet_tavg, di

	reghdfe norm_result c.meet_linspl* $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	estimates save "Semi_Parametric_Linear_Spline_2Knots_Homo", replace

	reghdfe norm_result c.meet_linspl* (c.team_linspl1 c.team_linspl2 c.team_linspl3)#c.meet_linspl* $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	estimates save "Semi_Parametric_Linear_Spline_2Knots_Hetero", replace

*** cubic spline with 3 knots
	mkspline2 meet_cubspl= Meet_tavg, cubic di knots(49.6 56.7 63.8)

	reghdfe norm_result c.meet_cubspl* $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	estimates save "Semi_Parametric_Cubic_Spline_Homo", replace

	reghdfe norm_result c.meet_cubspl* (c.team_linspl1 c.team_linspl2 c.team_linspl3)#c.meet_cubspl* $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	estimates save "Semi_Parametric_Cubic_Spline_Hetero", replace

*** cubic
	reghdfe norm_result Meet_tavg c.Meet_tavg#c.Meet_tavg c.Meet_tavg#c.Meet_tavg#c.Meet_tavg $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	estimates save "Semi_Parametric_Cubic_Homo", replace

	reghdfe norm_result Meet_tavg c.Meet_tavg#c.Meet_tavg c.Meet_tavg#c.Meet_tavg#c.Meet_tavg (c.team_linspl1 c.team_linspl2 c.team_linspl3)#(c.Meet_tavg c.Meet_tavg#c.Meet_tavg c.Meet_tavg#c.Meet_tavg#c.Meet_tavg) $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	estimates save "Semi_Parametric_Cubic_Hetero", replace

*-----------------------calculate performance changes--------------------------*
	
*** load data set 
	use "$PATH/Data/TF_Data.dta", clear

*** keep only endurance events
	keep if event_type == "Endurance"

*** drop training temperature lower than 40F to mitigate noise
	/*avoid the case where athletes might get trained indoors during extreme cold weathers*/
	drop if Team_tavg <= 40 

*** set scenario
	gen Meet_GDDPtemp = Meet_RCP85temp
	gen Team_GDDPtemp = Team_RCP85temp 

*** only keep weather variables
	keep athlete Meet_tavg Team_tavg Meet_GDDPtemp Team_GDDPtemp Team_zip norm_result

*** set other control variables at the mean level
	gen wind = -0.0000445
	gen Meet_preciptotal = 0.0819
    gen Meet_ozone = 0.0367131
    gen travel_dist = 277.2341

*** drop observations with missing data
	egen rowmiss = rowmiss(Meet_tavg Team_tavg Meet_GDDPtemp Team_GDDPtemp)
	drop if rowmiss > 0

*** construct temperature variables
	* team temperature 
	mkspline team_c_linspl1 50 team_c_linspl2 60 team_c_linspl3 = Team_tavg, di
	mkspline team_f_linspl1 50 team_f_linspl2 60 team_f_linspl3 = Team_GDDPtemp, di
	gen Team_tavg0 = 0
	mkspline team_0_linspl1 50 team_0_linspl2 60 team_0_linspl3 = Team_tavg0, di

	* meet temperature - cubic spline
	mkspline2 meet_c_cubspl= Meet_tavg, cubic di knots(49.6 56.7 63.8)
	mkspline2 meet_f_cubspl= Meet_GDDPtemp, cubic di knots(49.6 56.7 63.8)

	* meet temperature - linear spline with 2 knots
	mkspline meet_c_linspl1 50 meet_c_linspl2 60 meet_c_linspl3 = Meet_tavg, di
	mkspline meet_f_linspl1 50 meet_f_linspl2 60 meet_f_linspl3 = Meet_GDDPtemp, di

	* meet temperature - linear spline with 3 knots
	mkspline meet_c_3linspl1 50 meet_c_3linspl2 60 meet_c_3linspl3 70 meet_c_3linspl4 = Meet_tavg, di
	mkspline meet_f_3linspl1 50 meet_f_3linspl2 60 meet_f_3linspl3 70 meet_f_3linspl4 = Meet_GDDPtemp, di

	* backup meet and team temperature
	gen Meet_tavg_bkp = Meet_tavg 
	gen Team_tavg_bkp = Team_tavg 

*** initialize variables to store linear splines of team pre-exposure temperatures
	forvalues i = 1/3 {
		gen team_linspl`i' = .
	}

*** linear spline with 3 knots
	forvalues i = 1/4 {
		gen meet_3linspl`i' = .
	}

	* scenario 1
	estimates use "Semi_Parametric_Linear_Spline_3knots_Homo"

	forvalues i = 1/4 {
		replace meet_3linspl`i' = meet_c_3linspl`i'
	}
	predict yhat_c1_linear_spline3, xb

	forvalues i = 1/4 {
		replace meet_3linspl`i' = meet_f_3linspl`i'
	}
	predict yhat_f1_linear_spline3, xb

	* scenario 2
	estimates use "Semi_Parametric_Linear_Spline_3knots_Hetero"

	forvalues i = 1/4 {
		replace meet_3linspl`i' = meet_c_3linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_c2_linear_spline3, xb

	forvalues i = 1/4 {
		replace meet_3linspl`i' = meet_f_3linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_f_linspl`i'
	}
	predict yhat_f2_linear_spline3, xb

	* scenario 3
	estimates use "Semi_Parametric_Linear_Spline_3knots_Hetero"

	forvalues i = 1/4 {
		replace meet_3linspl`i' = meet_c_3linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_c3_linear_spline3, xb

	forvalues i = 1/4 {
		replace meet_3linspl`i' = meet_f_3linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_f3_linear_spline3, xb

	* scenario 4
	estimates use "Semi_Parametric_Linear_Spline_3knots_Hetero"
	forvalues i = 1/4 {
		replace meet_3linspl`i' = meet_c_3linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_0_linspl`i'
	}
	predict yhat_c4_linear_spline3, xb

	forvalues i = 1/4 {
		replace meet_3linspl`i' = meet_f_3linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_0_linspl`i'
	}
	predict yhat_f4_linear_spline3, xb

*** quadratic
	* scenario 1
	estimates use "Semi_Parametric_Quadratic_Homo"

	replace Meet_tavg = Meet_tavg_bkp
	predict yhat_c1_quadratic, xb

	replace Meet_tavg = Meet_GDDPtemp
	predict yhat_f1_quadratic, xb

	* scenario 2
	estimates use "Semi_Parametric_Quadratic_Hetero"
	replace Meet_tavg = Meet_tavg_bkp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_c2_quadratic, xb

	replace Meet_tavg = Meet_GDDPtemp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_f_linspl`i'
	}
	predict yhat_f2_quadratic, xb

	* scenario 3
	estimates use "Semi_Parametric_Quadratic_Hetero"
	replace Meet_tavg = Meet_tavg_bkp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_c3_quadratic, xb

	replace Meet_tavg = Meet_GDDPtemp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_f3_quadratic, xb

	* scenario 4
	estimates use "Semi_Parametric_Quadratic_Hetero"
	replace Meet_tavg = Meet_tavg_bkp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_0_linspl`i'
	}
	predict yhat_c4_quadratic, xb

	replace Meet_tavg = Meet_GDDPtemp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_0_linspl`i'
	}
	predict yhat_f4_quadratic, xb

*** cubic
	* scenario 1
	estimates use "Semi_Parametric_Cubic_Homo"

	replace Meet_tavg = Meet_tavg_bkp
	predict yhat_c1_cubic, xb

	replace Meet_tavg = Meet_GDDPtemp
	predict yhat_f1_cubic, xb

	* scenario 2
	estimates use "Semi_Parametric_Cubic_Hetero"
	replace Meet_tavg = Meet_tavg_bkp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_c2_cubic, xb

	replace Meet_tavg = Meet_GDDPtemp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_f_linspl`i'
	}
	predict yhat_f2_cubic, xb

	* scenario 3
	estimates use "Semi_Parametric_Cubic_Hetero"
	replace Meet_tavg = Meet_tavg_bkp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_c3_cubic, xb

	replace Meet_tavg = Meet_GDDPtemp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_f3_cubic, xb

	* scenario 4
	estimates use "Semi_Parametric_Cubic_Hetero"
	replace Meet_tavg = Meet_tavg_bkp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_0_linspl`i'
	}
	predict yhat_c4_cubic, xb

	replace Meet_tavg = Meet_GDDPtemp
	forvalues i = 1/3 {
		replace team_linspl`i' = team_0_linspl`i'
	}
	predict yhat_f4_cubic, xb

*** linear spline with 2 knots
	forvalues i = 1/3 {
		gen meet_linspl`i' = .
	}

	* scenario 1
	estimates use "Semi_Parametric_Linear_Spline_2Knots_Homo"
	forvalues i = 1/3 {
		replace meet_linspl`i' = meet_c_linspl`i'
	}
	predict yhat_c1_linear_spline2, xb

	forvalues i = 1/3 {
		replace meet_linspl`i' = meet_f_linspl`i'
	}
	predict yhat_f1_linear_spline2, xb

	* scenario 2
	estimates use "Semi_Parametric_Linear_Spline_2Knots_Hetero"
	forvalues i = 1/3 {
		replace meet_linspl`i' = meet_c_linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_c2_linear_spline2, xb

	forvalues i = 1/3 {
		replace meet_linspl`i' = meet_f_linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_f_linspl`i'
	}
	predict yhat_f2_linear_spline2, xb

	* scenario 3
	estimates use "Semi_Parametric_Linear_Spline_2Knots_Hetero"
	forvalues i = 1/3 {
		replace meet_linspl`i' = meet_c_linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_c3_linear_spline2, xb

	forvalues i = 1/3 {
		replace meet_linspl`i' = meet_f_linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_f3_linear_spline2, xb

	* scenario 4
	estimates use "Semi_Parametric_Linear_Spline_2Knots_Hetero"
	forvalues i = 1/3 {
		replace meet_linspl`i' = meet_c_linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_0_linspl`i'
	}
	predict yhat_c4_linear_spline2, xb

	forvalues i = 1/3 {
		replace meet_linspl`i' = meet_f_linspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_0_linspl`i'
	}
	predict yhat_f4_linear_spline2, xb

*** cubic spline
	forvalues i = 1/2 {
		gen meet_cubspl`i' = .
	}

	* scenario 1
	estimates use "Semi_Parametric_Cubic_Spline_Homo"
	forvalues i = 1/2 {
		replace meet_cubspl`i' = meet_c_cubspl`i'
	}
	predict yhat_c1_cubic_spline, xb

	forvalues i = 1/2 {
		replace meet_cubspl`i' = meet_f_cubspl`i'
	}
	predict yhat_f1_cubic_spline, xb

	* scenario 2
	estimates use "Semi_Parametric_Cubic_Spline_Hetero"
	forvalues i = 1/2 {
		replace meet_cubspl`i' = meet_c_cubspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_c2_cubic_spline, xb

	forvalues i = 1/2 {
		replace meet_cubspl`i' = meet_f_cubspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_f_linspl`i'
	}
	predict yhat_f2_cubic_spline, xb

	* scenario 3
	estimates use "Semi_Parametric_Cubic_Spline_Hetero"
	forvalues i = 1/2 {
		replace meet_cubspl`i' = meet_c_cubspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_c3_cubic_spline, xb

	forvalues i = 1/2 {
		replace meet_cubspl`i' = meet_f_cubspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_c_linspl`i'
	}
	predict yhat_f3_cubic_spline, xb

	* scenario 4
	estimates use "Semi_Parametric_Cubic_Spline_Hetero"
	forvalues i = 1/2 {
		replace meet_cubspl`i' = meet_c_cubspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_0_linspl`i'
	}
	predict yhat_c4_cubic_spline, xb

	forvalues i = 1/2 {
		replace meet_cubspl`i' = meet_f_cubspl`i'
	}
	forvalues i = 1/3 {
		replace team_linspl`i' = team_0_linspl`i'
	}
	predict yhat_f4_cubic_spline, xb

*** calculate percentage changes
	foreach model in linear_spline3 quadratic cubic linear_spline2 cubic_spline {
		gen diff1_`model' = yhat_f1_`model' - yhat_c1_`model'
		gen diff2_`model' = yhat_f2_`model' - yhat_c2_`model'
		gen diff3_`model' = yhat_f3_`model' - yhat_c3_`model'
		gen diff4_`model' = yhat_f4_`model' - yhat_c4_`model'

		gen percent1_`model' = (yhat_f1_`model' - yhat_c1_`model') / abs(norm_result)
		gen percent2_`model' = (yhat_f2_`model' - yhat_c2_`model') / abs(norm_result)
		gen percent3_`model' = (yhat_f3_`model' - yhat_c3_`model') / abs(norm_result)
		gen percent4_`model' = (yhat_f4_`model' - yhat_c4_`model') / abs(norm_result)
	}

*** calculate mean changes
	collapse (mean) diff1* diff2* diff3* diff4* percent1* percent2* percent3* percent4*, by(ath Team_zip Team_tavg)
	foreach model in linear_spline3 quadratic cubic linear_spline2 cubic_spline {
	forvalues i = 1/4 {
		replace percent`i'_`model' = percent`i'_`model' * 100
	}
	}

	collapse (mean) diff1_linear_spline3 diff1_cubic_spline diff1_linear_spline2 diff1_quadratic diff1_cubic ///
					diff2_linear_spline3 diff2_cubic_spline diff2_linear_spline2 diff2_quadratic diff2_cubic ///
					diff3_linear_spline3 diff3_cubic_spline diff3_linear_spline2 diff3_quadratic diff3_cubic ///
					diff4_linear_spline3 diff4_cubic_spline diff4_linear_spline2 diff4_quadratic diff4_cubic ///
					percent1_linear_spline3 percent1_cubic_spline percent1_linear_spline2 percent1_quadratic percent1_cubic ///
					percent2_linear_spline3 percent2_cubic_spline percent2_linear_spline2 percent2_quadratic percent2_cubic ///
					percent3_linear_spline3 percent3_cubic_spline percent3_linear_spline2 percent3_quadratic percent3_cubic ///
					percent4_linear_spline3 percent4_cubic_spline percent4_linear_spline2 percent4_quadratic percent4_cubic ///
			 (semean) se1diff_linear_spline3 = diff1_linear_spline3 ///
			 		  se1diff_cubic_spline   = diff1_cubic_spline ///
			 		  se1diff_linear_spline2 = diff1_linear_spline2 ///
			 		  se1diff_quadratic 	 = diff1_quadratic ///
			 		  se1diff_cubic  		 = diff1_cubic ///
			 		  se2diff_linear_spline3 = diff2_linear_spline3 ///
			 		  se2diff_cubic_spline   = diff2_cubic_spline ///
			 		  se2diff_linear_spline2 = diff2_linear_spline2 ///
			 		  se2diff_quadratic      = diff2_quadratic ///
			 		  se2diff_cubic          = diff2_cubic ///
			 		  se3diff_linear_spline3 = diff3_linear_spline3 ///
			 		  se3diff_cubic_spline   = diff3_cubic_spline ///
			 		  se3diff_linear_spline2 = diff3_linear_spline2 ///
			 		  se3diff_quadratic      = diff3_quadratic ///
			 		  se3diff_cubic          = diff3_cubic ///
			 		  se4diff_linear_spline3 = diff4_linear_spline3 ///
			 		  se4diff_cubic_spline   = diff4_cubic_spline ///
			 		  se4diff_linear_spline2 = diff4_linear_spline2 ///
			 		  se4diff_quadratic      = diff4_quadratic ///
			 		  se4diff_cubic          = diff4_cubic ///
			 		  se1percent_linear_spline3 = percent1_linear_spline3 ///
			 		  se1percent_cubic_spline   = percent1_cubic_spline ///
			 		  se1percent_linear_spline2 = percent1_linear_spline2 ///
			 		  se1percent_quadratic 	 	= percent1_quadratic ///
			 		  se1percent_cubic  		= percent1_cubic ///
			 		  se2percent_linear_spline3 = percent2_linear_spline3 ///
			 		  se2percent_cubic_spline   = percent2_cubic_spline ///
			 		  se2percent_linear_spline2 = percent2_linear_spline2 ///
			 		  se2percent_quadratic      = percent2_quadratic ///
			 		  se2percent_cubic          = percent2_cubic ///
			 		  se3percent_linear_spline3 = percent3_linear_spline3 ///
			 		  se3percent_cubic_spline   = percent3_cubic_spline ///
			 		  se3percent_linear_spline2 = percent3_linear_spline2 ///
			 		  se3percent_quadratic      = percent3_quadratic ///
			 		  se3percent_cubic          = percent3_cubic ///
			 		  se4percent_linear_spline3 = percent4_linear_spline3 ///
			 		  se4percent_cubic_spline   = percent4_cubic_spline ///
			 		  se4percent_linear_spline2 = percent4_linear_spline2 ///
			 		  se4percent_quadratic      = percent4_quadratic ///
			 		  se4percent_cubic          = percent4_cubic 
	gen i = _n
	reshape long diff1 diff2 diff3 diff4 se1diff se2diff se3diff se4diff percent1 percent2 percent3 percent4 se1percent se2percent se3percent se4percent, i(i) j(model) string
	order diff1 se1diff diff2 se2diff diff3 se3diff diff4 se4diff percent1 se1percent percent2 se2percent percent3 se3percent percent4 se4percent
	replace i = 2 if model == "_quadratic"
	replace i = 3 if model == "_cubic"
	replace i = 4 if model == "_linear_spline2"
	replace i = 5 if model == "_cubic_spline"
	sort i

*** export results 
	export excel using "Estimated_Performance_Change.xlsx", firstrow(var) replace


********************************************************************************
* Figure 3: distribution of bootstrap estimates of mean performance losses
********************************************************************************

*** set results folder
	cd "$PATH/Results/Simulations/Bootstrap"

*----------------------------regression estimates------------------------------*

*** load data set 
	use "$PATH/Data/TF_Data.dta", clear

*** define control variables and fixed effects
	global weather_control wind Meet_preciptotal Meet_ozone travel_dist 
	global absorb_var athlete season_week event_code##(venue home1) year_code i.Meet_dew_bin

*** drop the observations that have missing values on key variables
	drop if Meet_tavg == . | Team_tavg == . | Meet_preciptotal ==. | Meet_ozone == .

*** drop training temperature lower than 40F to mitigate noise
	/*avoid the case where athletes might get trained indoors during extreme cold weathers*/
	drop if Team_tavg <= 40 

*** set clustered standard error
	global cluster_se meet_code athlete

*** keep only endurance events
	keep if event_type == "Endurance"

*** construct linear splines 
	mkspline team_linspl1  50 team_linspl2  60 team_linspl3 = Team_tavg, di
	mkspline meet_3linspl1 50 meet_3linspl2 60 meet_3linspl3 70 meet_3linspl4 = Meet_tavg, di

*** bootstrap 1000 times
	set seed 100
	forvalues i = 1/1000 {
		preserve
			bsample, cluster(athlete)

			reghdfe norm_result c.meet_3linspl* $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
			estimates save "Semi_Parametric_Bootstrap_Homo`i'", replace

			reghdfe norm_result c.meet_3linspl* (c.team_linspl1 c.team_linspl2 c.team_linspl3)#c.meet_3linspl* $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
			estimates save "Semi_Parametric_Bootstrap_Hetero`i'", replace
		restore
		disp "-----`i'/1000 finished-----"
	}

*-----------------------calculate performance changes--------------------------*
	
*** load data set 
	use "$PATH/Data/TF_Data.dta", clear

*** keep only endurance events
	keep if event_type == "Endurance"

*** drop training temperature lower than 40F to mitigate noise
	/*avoid the case where athletes might get trained indoors during extreme cold weathers*/
	drop if Team_tavg <= 40 

*** set scenario
	gen Meet_GDDPtemp = Meet_RCP85temp
	gen Team_GDDPtemp = Team_RCP85temp 

*** only keep weather variables
	keep athlete Meet_tavg Team_tavg Meet_GDDPtemp Team_GDDPtemp Team_zip norm_result

*** set other control variables at the mean level
	gen wind = -0.0000445
	gen Meet_preciptotal = 0.0819
    gen Meet_ozone = 0.0367131
    gen travel_dist = 277.2341

*** drop observations with missing data
	egen rowmiss = rowmiss(Meet_tavg Team_tavg Meet_GDDPtemp Team_GDDPtemp)
	drop if rowmiss > 0

*** construct linear spline for team temperature
	mkspline team_c_linspl1 50 team_c_linspl2 60 team_c_linspl3 = Team_tavg, di
	mkspline team_f_linspl1 50 team_f_linspl2 60 team_f_linspl3 = Team_GDDPtemp, di

*** meet temperature - linear spline with 3 knots
	mkspline meet_c_3linspl1 50 meet_c_3linspl2 60 meet_c_3linspl3 70 meet_c_3linspl4 = Meet_tavg, di
	mkspline meet_f_3linspl1 50 meet_f_3linspl2 60 meet_f_3linspl3 70 meet_f_3linspl4 = Meet_GDDPtemp, di

*** prediction
	forvalues i = 1/3 {
		gen team_linspl`i' = .
	}
	forvalues i = 1/4 {
		gen meet_3linspl`i' = .
	}

	forvalues k = 1/1000 {

		* scenario 1: absent explicit accounting for adaptation
		estimates use "Semi_Parametric_Bootstrap_Homo`k'"

		forvalues i = 1/4 {
			replace meet_3linspl`i' = meet_c_3linspl`i'
		}
		predict yhat_c1, xb

		forvalues i = 1/4 {
			replace meet_3linspl`i' = meet_f_3linspl`i'
		}
		predict yhat_f1, xb

		gen percent1_`k' = (yhat_f1 - yhat_c1) / abs(norm_result)
		drop yhat_f1 yhat_c1

		* scenario 2: accounting for future adaptation
		estimates use "Semi_Parametric_Bootstrap_Hetero`k'"

		forvalues i = 1/4 {
			replace meet_3linspl`i' = meet_c_3linspl`i'
		}
		forvalues i = 1/3 {
			replace team_linspl`i' = team_c_linspl`i'
		}
		predict yhat_c2, xb

		forvalues i = 1/4 {
			replace meet_3linspl`i' = meet_f_3linspl`i'
		}
		forvalues i = 1/3 {
			replace team_linspl`i' = team_f_linspl`i'
		}
		predict yhat_f2, xb

		gen percent2_`k' = (yhat_f2 - yhat_c2) / abs(norm_result)
		drop yhat_f2 yhat_c2

	}

*** calculate the average change in performance 
	keep athlete Team_zip Team_tavg percent*
	foreach i in 1 2 {
	forvalues boots = 1/1000 {
		replace percent`i'_`boots' = percent`i'_`boots' * 100
	}
	}
	collapse (mean) percent1_1 - percent2_1000, by(athlete Team_zip Team_tavg)
	collapse (mean) percent1_1 - percent2_1000
	gen ID = _n
	reshape long percent1_ percent2_, i(ID) j(boots)
	tabstat percent1 percent2, s(mean semean)
	drop ID

*** save the bootstrapping results
	save "Bootstrap_Result.dta", replace

*---------------------------------plotting------------------------------------*
	
grstyle init 
grstyle set plain, nogrid horizontal

twoway ///
	(histogram percent1, fcolor(gs10) lcolor(gs14) lwidth(vthin)) ///
	(histogram percent2, fcolor(black) lcolor(gs14) lwidth(vthin)), ///
	legend(size(vsmall) ///
	order(1 "Absent Explicit Accounting for Adaptation" 2 "Accounting for Future Adaptation") position(1) ring(0) col(1) region(fcolor(none) lcolor(none))) ///
	xtitle(Percentage Change in Performance (%)) 
graph export "Bootstrap_Distribution.eps", replace


********************************************************************************
* Table A3: semi-parametric estimation
********************************************************************************

*** set results folder
	cd "$PATH/Results/Simulations/Baseline"

*** load data set 
	use "$PATH/Data/TF_Data.dta", clear

*** define control variables and fixed effects
	global weather_control wind Meet_preciptotal Meet_ozone travel_dist 
	global absorb_var athlete season_week event_code##(venue home1) year_code i.Meet_dew_bin

*** drop the observations that have missing values on key variables
	drop if Meet_tavg == . | Team_tavg == . | Meet_preciptotal ==. | Meet_ozone == .

*** drop training temperature lower than 40F to mitigate noise
	/*avoid the case where athletes might get trained indoors during extreme cold weathers*/
	drop if Team_tavg <= 40 

*** set clustered standard error
	global cluster_se meet_code athlete

*** keep only endurance events
	keep if event_type == "Endurance"

*** construct linear splines for pre-exposure temperatures
	mkspline team_linspl1 50 team_linspl2 60 team_linspl3 = Team_tavg, di

*** linear spline with 3 knots
	cap drop meet_3linspl*
	mkspline meet_3linspl1 50 meet_3linspl2 60 meet_3linspl3 70 meet_3linspl4  = Meet_tavg, di

	reghdfe norm_result c.meet_3linspl* $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	outreg2 using Semi_Parametric_Linear_Spline_3knots.xls, excel drop($weather_control) adds(Adjusted r2, e(r2_a), Number of Athletes, e(N_clust2)) 

	reghdfe norm_result c.meet_3linspl* (c.team_linspl1 c.team_linspl2 c.team_linspl3)#c.meet_3linspl* $weather_control, absorb($absorb_var) vce(cluster meet_code athlete)
	outreg2 using Semi_Parametric_Linear_Spline_3knots.xls, excel drop($weather_control) adds(Adjusted r2, e(r2_a), Number of Athletes, e(N_clust2)) 

********************************************************************************
* Figure A3: predicted temperature-performance relationship in 2-D graph
********************************************************************************

*** set results folder
	cd "$PATH/Results/Simulations/Baseline"

*** construct a dataset
	clear
	set obs 600
	gen i = _n
	gen double Team_tavg = 35 if i <= 100
	replace Team_tavg = 45 if i>100 & i <=200
	replace Team_tavg = 55 if i>200 & i <=300
	replace Team_tavg = 65 if i>300 & i <=400
	replace Team_tavg = 75 if i>400 & i <=500
	replace Team_tavg = 54.768 if i>500 & i <=600
	gen Meet_tavg = i * 90 / 100 if i <= 100
	replace Meet_tavg = (i-100) * 90 /100 if i > 100 & i <= 200
	replace Meet_tavg = (i-200) * 90 /100 if i > 200 & i <= 300
	replace Meet_tavg = (i-300) * 90 /100 if i > 300 & i <= 400
	replace Meet_tavg = (i-400) * 90 /100 if i > 400 & i <= 500
	replace Meet_tavg = (i-500) * 90 /100 if i > 500 & i <= 600

	mkspline meet_3linspl1 50 meet_3linspl2 60 meet_3linspl3 70 meet_3linspl4 = Meet_tavg, di
	mkspline meet_linspl1 50 meet_linspl2 60 meet_linspl3 = Meet_tavg, di
	mkspline2 meet_cubspl= Meet_tavg, cubic di knots(49.6 56.7 63.8)
	mkspline team_linspl1 50 team_linspl2 60 team_linspl3 = Team_tavg, di

*** set other control variables at the mean level
	gen wind = -0.0000445
	gen Meet_preciptotal = 0.0819
    gen Meet_ozone = 0.0367131
    gen travel_dist = 277.2341

*** plotting
	foreach model in Linear_Spline_3knots Linear_Spline_2Knots Cubic_Spline Quadratic Cubic { 
		preserve

			estimates use "Semi_Parametric_`model'_Hetero"
			predict yhat, xb
			predict sehat, stdp
			gen yhat_p2se = yhat + 2*sehat
			gen yhat_m2se = yhat - 2*sehat

			keep if Meet_tavg >= 30
			grstyle init
			grstyle set plain, nogrid
			twoway ///
				 (line yhat Meet_tavg if Team_tavg==35, lwidth(medthick) lcolor(navy)  cmissing(y) sort) ///
				 (line yhat Meet_tavg if Team_tavg==45, lwidth(medthick) lcolor(blue)  cmissing(y) sort) ///
				 (line yhat Meet_tavg if Team_tavg==55, lwidth(medthick) lcolor(gs8)  cmissing(y) sort) ///
				 (line yhat Meet_tavg if Team_tavg==65, lwidth(medthick) lcolor(orange)  cmissing(y) sort) ///
				 (line yhat Meet_tavg if Team_tavg==75, lwidth(medthick) lcolor(red)  cmissing(y) sort) ///
				 , ///
				 legend(size(small) order(1 "35" 2 "45" 3 "55" 4 "65" 5 "75") title(Pre-Exposure (F), size(small)) position(7) ring(0) col(1)) ///
				 xtitle("Temperature (F)") ///
				 ytitle("") ///
				 xlabel(30(5)90) ///
				 scale(1.2) ///
				 title(`model')
			graph save "Prediction_`model'.gph", replace
		restore
	}

	graph use "Prediction_Linear_Spline_3Knots.gph"
	gr_edit .title.text = {}
	gr_edit .title.text.Arrpush Linear Spline with 3 Knots
	graph export "Prediction_Linear_Spline_3Knots.pdf", replace

	graph use "Prediction_Cubic_Spline.gph"
	gr_edit .title.text = {}
	gr_edit .title.text.Arrpush Cubic Spline
	graph save "Prediction_Cubic_Spline.gph", replace

	graph use "Prediction_Linear_Spline_2Knots.gph"
	gr_edit .title.text = {}
	gr_edit .title.text.Arrpush Linear Spline with 2 Knots
	graph save "Prediction_Linear_Spline_2Knots.gph", replace

	graph combine "Prediction_Quadratic.gph" "Prediction_Cubic.gph", xsize(10) col(2)
	graph export "Prediction_Polynomial.pdf", replace
	graph combine "Prediction_Linear_Spline_2Knots.gph" "Prediction_Cubic_Spline.gph", xsize(10) col(2) 
	graph export "Prediction_Spline.pdf", replace

*** delete intermediate figures 
	foreach v in Linear_Spline_3Knots Linear_Spline_2Knots Cubic_Spline Quadratic Cubic { 
		erase "Prediction_`v'.gph"
	}


********************************************************************************
* Figure A2: predicted temperature-performance relationship in 3-D graph
********************************************************************************

*** set results folder
	cd "$PATH/Results/Simulations/Baseline"

*** calculate predictions and export to excel
	clear

	set obs 100
	gen i = _n
	gen Meet_tavg = 30 + i * 60 / 100 

	forvalues i = 1/100 {
		gen Meet_tavg`i' = Meet_tavg
	}
	drop Meet_tavg

	reshape long Meet_tavg, i(i) j(j)
	gen Team_tavg = 40 + j * 50 / 100

	mkspline meet_3linspl1 50 meet_3linspl2 60 meet_3linspl3 70 meet_3linspl4 = Meet_tavg, di
	mkspline meet_linspl1 50 meet_linspl2 60 meet_linspl3 = Meet_tavg, di
	mkspline2 meet_cubspl= Meet_tavg, cubic di knots(49.6 56.7 63.8)
	mkspline team_linspl1 50 team_linspl2 60 team_linspl3 = Team_tavg, di

	gen wind = -0.0000445
	gen Meet_preciptotal = 0.0819
    gen Meet_ozone = 0.0367131
    gen travel_dist = 277.2341

	foreach model in Linear_Spline_3knots Linear_Spline_2Knots Cubic_Spline Quadratic Cubic { 
		estimates use "Semi_Parametric_`model'_Hetero"
		predict y_`model', xb		
	}

	keep i j Meet_tavg Team_tavg y_*
	export excel using "Semi_Parametric_Model_Prediction_3D.xlsx", firstrow(var) replace

*** use R script to create 3-D graphs
	* run "Plot_3D.R"
